################################################################################ 
#
# Replication code for:
# Empathy-based counterspeech can reduce racist hate speech
# in a social media field experiment
#
# PNAS 2021
# 
# Fig 2
#
################################################################################ 


rm(list = ls())

# Set to save results
setsave = T

################################################################################ 
#  LIBRARIES
################################################################################ 

library(hdm)
library(plyr)
library(dplyr)
library(tidyr)
library(readr)
library(glmnet)
library(estimatr)
library(ggplot2)
library(ggpubr)
library(xtable)

set.seed(123)

################################################################################ 
#   DATA AND FOLDER
################################################################################ 

wd = dirname(dirname(dirname(rstudioapi::getActiveDocumentContext()$path)))
wd_res = paste0(wd, '/results')
wd_data =  paste0(wd, '/data')


# Upload the data
setwd(wd_data)
data = read.csv('main_dataset.csv') %>% as.data.frame()
lassodata = read_rds('data_lasso2.RDS')
lassodata = lassodata[-1]

data = data[(is.na(data$deleted_suspended)), ]


################################################################################ 
#   VARIABLES
################################################################################

data$empathy_dummy = ifelse(data$treatment_main==3, 1, 0)
data$consequences_dummy = ifelse(data$treatment_main==2, 1, 0)
data$meme_dummy = ifelse(data$treatment_main==1, 1, 0)

################################################################################ 
#   Prepare
################################################################################ 

outvars = c('num_hate_tweets_post', 'num_tweets_post', 'post_hate_share_tot',
            'outcome_deleted_specific', 'deletion_rate', 'tox_vader_neu_post')


pastvars = c('num_hate_tweets_pre', 'num_tweets_pre', 'pre_hate_share_tot',
           'tox_vader_neu_pre')

ctr = c('num_hate_tweets_pre', 'num_tweets_pre')


categories = c('Hate Speech\nCreation', 'Hate Speech\nCreation', 'Hate Speech\nCreation',
               'Hate Speech\nDeletion', 'Hate Speech\nDeletion', 'Tone')

treatments = c('meme_dummy', 'consequences_dummy', 'empathy_dummy')
treatments_lab = c( 'Humor', 'Warning of Consequences',  'Empathy')

outvars_name = c( '# Xenophobic Tweets', '# Total Tweets', 'Xenophobic Tweet Share',
                  'Xenophobic Tweet Deleted', 'Tweet Deletion Rate', 'Vader Negativity')

################################################################################ 
#   Scale variables
################################################################################ 


data[outvars] = lapply(data[outvars], scale)
lassodata = lapply(lassodata, scale) %>% as.data.frame()


################################################################################ 
#   Run the reg with double lasso selection
################################################################################ 


MODELS = list()

n = 1

for (j in 1:length(outvars)){
  for (i in 1:length(treatments)){
    
    droper = union(which(is.na(data[outvars[j]])), which(data[outvars[j]]==Inf))
    keeper = union(which(data$treatment_main == 0),  which(data$treatment_main == i))
    keeper = setdiff(keeper, droper)
    
    y = data[keeper, outvars[j]]
    d = data$treatment_dummy[keeper]
    x = lassodata[keeper, ] %>% as.matrix()

    c = rep(FALSE, ncol(x))
    c[c(2,3)] = TRUE
  
    MODELS[[n]] = rlassoEffect(x, y, d, method = "double selection")
     
    print('Done with: ')
    print(n)
    
    n = n+1
    
  }}



################################################################################ 
#   Save results in a container
################################################################################ 


MOD = MODELS

coef = sapply(MOD, function(m) summary(m)[[1]][1])
se = sapply(MOD, function(m) summary(m)[[1]][2])
pval = sapply(MOD, function(m) summary(m)[[1]][4])
regnum =  sapply(MOD, function(m) length(m$coefficients.reg)-2)
sample = sapply(MOD, function(m) m$samplesize)
controls = sapply(MOD, function(m) paste0(names(m$coefficients.reg)[-c(1, 2)], collapse = ', '))

container = cbind.data.frame(yvar = rep(outvars, each=3),
                             yvar_name = rep(outvars_name, each=3),
                             categories = rep(categories, each=3),
                             treat = rep(treatments, time=length(outvars)),
                             treatments_lab = rep(treatments_lab, time=length(outvars)),
                             coef = coef, se=se, pval=pval,
                             samplesize = sample,
                             lasso_cont_num = regnum,
                             controls = controls
                             )


if (setsave){write.csv(container, paste0(wd_data, '/auxiliary/container_lasso_controls.csv'), row.names = F)}


################################################################################ 
#   Make plot
################################################################################ 

# container = read.csv(paste0(wd_data, '/auxiliary/container_lasso_controls.csv'))

container[c('coef', 'se', 'pval')][container$treat=='meme_dummy',]%>% xtable(digits=c(3))
container[c('coef', 'se', 'pval')][container$treat=='consequences_dummy',]%>% xtable(digits=c(3))
container[c('coef', 'se', 'pval')][container$treat=='empathy_dummy',]%>% xtable(digits=c(3))


# Give final names to vars
container$yvar_name[container$yvar_name=="# Xenophobic Tweets"] = "# Xenophobic\nTweets\n(mean=2.95,\nSD=12.61)"
container$yvar_name[container$yvar_name=="# Total Tweets"] = "# Total\nTweets\n(mean=506,\nSD=704.90)"
container$yvar_name[container$yvar_name=="Xenophobic Tweet Share"] = "Xenophobic\nTweet Share\n(mean=0.01,\nSD=0.05)"
container$yvar_name[container$yvar_name=='Tweet Deletion Rate'] = 'Tweet\nDeletion Rate\n(mean=0.02,\nSD=0.09)'
container$yvar_name[container$yvar_name=="Vader Negativity"] = "Vader\nNegativity\n(mean=0.71,\nSD=0.11)"
container$yvar_name[container$yvar_name=='Xenophobic Tweet Deleted'] = 'Xenophobic\nTweet Deleted\n(mean=0.19,\nSD=0.40)'


# Create factors for better plots
container$yvar_name.f = factor(container$yvar_name, levels=unique(container$yvar_name))
container$treatments_lab.f = factor(container$treatments_lab, levels=unique(container$treatments_lab))
container$categories.f = factor(container$categories, levels=unique(container$categories))


# Create plot
makeplot = function(category, title){
  ggplot(container[container$categories %in% category,],
         aes(x = coef, y = yvar_name.f, colour = treatments_lab.f, shape = treatments_lab.f,
             label = round(coef, 2))) + 
  geom_line() +
  geom_point(position = position_dodge(width = 1/2),  size=5)+
  geom_errorbar(aes(xmin=coef-se*1.96, xmax=coef+se*1.96), width=0,
                position = position_dodge(width = 1/2), size=1) +
  geom_errorbar(aes(xmin=coef-se*1.645, xmax=coef+se*1.645), width=0,
                position = position_dodge(width = 1/2), size=2) + 
  geom_vline(xintercept=0, color = "black", linetype="dashed") +
  xlab("") + ylab("") + 
  theme_light()  +
  scale_y_discrete(limits=rev)+
  ggtitle(title) + 
  theme(text = element_text(size=28), 
        plot.title = element_text(size=30),
        axis.text = element_text(color="black"),
        legend.position = 'bottom', 
        legend.box = "horizontal", 
        legend.title=element_blank())
  }

gg1 = makeplot(c("Hate Speech\nCreation"), "Hate Speech Creation")
gg2 = makeplot(c("Hate Speech\nDeletion", "Tone"), "Hate Speech Deletion and Overall Tone")
ggarrange(gg1, gg2, widths=c(1,1), common.legend=T, legend = 'bottom')


if (setsave){ggsave(paste0(wd_res, '/fig2.png'),  width=20, height=8)}


